{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Predicting Fuel Consumption with Pytorch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "import torch, torchvision\n",
    "import torchvision.transforms as transforms\n",
    "from torch.autograd import Variable\n",
    "from torch import nn\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from torch.nn import Module\n",
    "from torch.utils.data.sampler import SubsetRandomSampler\n",
    "from torch.utils.data import DataLoader\n",
    "import os\n",
    "os.chdir('c:/users/nicolas/documents/data/thecarconnection')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# loading the data\n",
    "df = pd.read_csv('cleaned_encoded_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dropping columns, assigning input/output\n",
    "x = df.drop(['Make', 'Model', 'Style Name', 'EPA Fuel Economy Est - Hwy (MPG)',\n",
    "             'EPA Fuel Economy Est - City (MPG)'], axis=1).values\n",
    "y = df['EPA Fuel Economy Est - Hwy (MPG)'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# casting the input/output\n",
    "x = x.astype(np.float32)\n",
    "y = y.astype(np.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# rescale the predictors\n",
    "scaler = MinMaxScaler()\n",
    "x = scaler.fit_transform(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# send to cuda if available\n",
    "if torch.cuda.is_available():\n",
    "    x = torch.from_numpy(x)\n",
    "    y = torch.from_numpy(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create sampler\n",
    "class CarDataset():\n",
    "    \n",
    "    def __init__(self):\n",
    "        self.len = x.shape[0]\n",
    "        self.x = x\n",
    "        self.y = y\n",
    "        \n",
    "    def __getitem__(self, index):\n",
    "        return x[index], y[index].unsqueeze(0)\n",
    "    \n",
    "    def __len__(self):\n",
    "        return self.len"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create an instance of sampler\n",
    "data = CarDataset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create iterator\n",
    "train_loader = DataLoader(dataset=data, batch_size=8, shuffle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create model class\n",
    "class Model(Module):\n",
    "    \n",
    "    def __init__(self):\n",
    "        super(Model, self).__init__()\n",
    "        self.l1 = torch.nn.Linear(270, 156)\n",
    "        self.l2 = torch.nn.Linear(156, 312)\n",
    "        self.l3 = torch.nn.Linear(312, 1024)\n",
    "        self.l4 = torch.nn.Linear(1024, 1)\n",
    "        \n",
    "        self.relu = torch.nn.ReLU()\n",
    "        \n",
    "    def forward(self, x):\n",
    "        out1 = self.relu(self.l1(x))\n",
    "        out2 = self.relu(self.l2(out1))\n",
    "        out3 = self.relu(self.l3(out2))\n",
    "        ypred = self.l4(out3)\n",
    "        return ypred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# instantiate the model\n",
    "model = Model()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# loss function and optimizer\n",
    "criterion = torch.nn.L1Loss()\n",
    "optimizer = torch.optim.Adam(params=model.parameters(), lr=.0001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1 Loss: 0.996\n",
      "Epoch 2 Loss: 1.236\n",
      "Epoch 3 Loss: 1.253\n",
      "Epoch 4 Loss: 1.03\n",
      "Epoch 5 Loss: 0.613\n",
      "Epoch 6 Loss: 1.046\n",
      "Epoch 7 Loss: 1.132\n",
      "Epoch 8 Loss: 0.702\n",
      "Epoch 9 Loss: 1.585\n",
      "Epoch 10 Loss: 0.851\n"
     ]
    }
   ],
   "source": [
    "# train the model \n",
    "for epoch in range(1, 10+1):\n",
    "    for i, data in enumerate(train_loader):\n",
    "        inputs, labels = data\n",
    "        inputs, labels = Variable(inputs), Variable(labels)\n",
    "        ypred = model(inputs)\n",
    "        loss = criterion(ypred, labels)\n",
    "        optimizer.zero_grad()\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "    print('Epoch', str(epoch) + ' Loss:', np.round(loss.data.item(), 3))\n",
    "    if loss.data.item() < .5:\n",
    "        break"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
